# Download MathPile
MATHPILE_PATH="./mathpile"
huggingface-cli download --resume-download --repo-type dataset GAIR/MathPile --local-dir $MATHPILE_PATH --local-dir-use-symlinks False
cd $MATHPILE_PATH/train
find . -type f -name "*.gz" -exec gzip -d {} \;
cd -

# Prepare data for synthesis
SAVE_DIR=/path/to/data
bash sh/synthesis/build_cot_data.sh $SAVE_DIR $MATHPILE_PATHs